import pandas as pd
import numpy as np
#IMPORTING THE DATASET
tvshows_df = pd.read_csv('tv_shows.csv')
tvshows_df
| Unnamed: 0 | Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | type | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Breaking Bad | 2008 | 18+ | 9.5 | 96% | 1 | 0 | 0 | 0 | 1 |
| 1 | 1 | Stranger Things | 2016 | 16+ | 8.8 | 93% | 1 | 0 | 0 | 0 | 1 |
| 2 | 2 | Money Heist | 2017 | 18+ | 8.4 | 91% | 1 | 0 | 0 | 0 | 1 |
| 3 | 3 | Sherlock | 2010 | 16+ | 9.1 | 78% | 1 | 0 | 0 | 0 | 1 |
| 4 | 4 | Better Call Saul | 2015 | 18+ | 8.7 | 97% | 1 | 0 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5606 | 5606 | Tut's Treasures: Hidden Secrets | 2018 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5607 | 5607 | Paradise Islands | 2017 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5608 | 5608 | Wild Russia | 2018 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5609 | 5609 | Love & Vets | 2017 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5610 | 5610 | United States of Animals | 2016 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
5611 rows × 11 columns
#DATA CLEANING
new_df=tvshows_df[tvshows_df.IMDb.notna()]
new_df=new_df[new_df.Age.notna()]
new_df=new_df[new_df['Rotten Tomatoes'].notna()]
new_df.reset_index(inplace=True)
new_df=new_df.drop(columns=['index','Unnamed: 0','type'])
#CONVERTING THE NUMERICAL COLUMN TO Decipherable type
IMDb_rating=np.array(list(new_df.IMDb))
RT_rating=np.array(np.char.strip(np.array(list(new_df["Rotten Tomatoes"])),'%'),dtype='float64')
new_df['Rotten Tomatoes']=RT_rating
#cleaned data frame
new_df
| Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Breaking Bad | 2008 | 18+ | 9.5 | 96.0 | 1 | 0 | 0 | 0 |
| 1 | Stranger Things | 2016 | 16+ | 8.8 | 93.0 | 1 | 0 | 0 | 0 |
| 2 | Money Heist | 2017 | 18+ | 8.4 | 91.0 | 1 | 0 | 0 | 0 |
| 3 | Sherlock | 2010 | 16+ | 9.1 | 78.0 | 1 | 0 | 0 | 0 |
| 4 | Better Call Saul | 2015 | 18+ | 8.7 | 97.0 | 1 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 926 | Diary of a Future President | 2020 | 7+ | 5.5 | 100.0 | 0 | 0 | 0 | 1 |
| 927 | Encore! | 2019 | 7+ | 7.4 | 68.0 | 0 | 0 | 0 | 1 |
| 928 | Spider-Man Unlimited | 1999 | 7+ | 6.5 | 50.0 | 0 | 0 | 0 | 1 |
| 929 | The Super Hero Squad Show | 2009 | 7+ | 6.1 | 50.0 | 0 | 0 | 0 | 1 |
| 930 | Marvel's Hero Project | 2019 | 7+ | 6.1 | 92.0 | 0 | 0 | 0 | 1 |
931 rows × 9 columns
#finding the minimum and maximum IMDb ratings
new_df[new_df.IMDb==new_df.IMDb.min()]
| Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|---|---|---|
| 732 | A Little Late with Lilly Singh | 2019 | 16+ | 1.7 | 90.0 | 0 | 1 | 0 | 0 |
new_df[new_df.IMDb==new_df.IMDb.max()]
| Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Breaking Bad | 2008 | 18+ | 9.5 | 96.0 | 1 | 0 | 0 | 0 |
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 15
matplotlib.rcParams['figure.figsize'] = (9, 5)
matplotlib.rcParams['figure.facecolor'] = '#00000000'
#PLOTTING HISTOGRAM
fig, ax = plt.subplots(figsize=(12,9))
ax.hist(new_df.Year,bins=5, density=False)
plt.xlabel("Year")
plt.ylabel("Rotten Tomatoes")
plt.title("Year")
for rect in ax.patches:
height = rect.get_height()
ax.annotate(f'{int(height)}', xy=(rect.get_x()+rect.get_width()/2, height),xytext=(0, 5), textcoords='offset points', ha='center', va='bottom')
plt.show()
new_df.describe()
| Year | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|---|
| count | 931.000000 | 931.000000 | 931.000000 | 931.000000 | 931.000000 | 931.000000 | 931.000000 |
| mean | 2013.093448 | 7.534801 | 77.823845 | 0.446831 | 0.375940 | 0.227712 | 0.024705 |
| std | 7.606510 | 0.952062 | 20.369783 | 0.497432 | 0.484625 | 0.419581 | 0.155307 |
| min | 1959.000000 | 1.700000 | 6.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 2011.000000 | 7.100000 | 67.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 50% | 2015.000000 | 7.700000 | 84.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 75% | 2018.000000 | 8.200000 | 93.000000 | 1.000000 | 1.000000 | 0.000000 | 0.000000 |
| max | 2020.000000 | 9.500000 | 100.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
#rating comparison using the boxplot
fig,ax = plt.subplots()
bp_data=[IMDb_rating*10,RT_rating]
ax.boxplot(bp_data)
ax.set_xticklabels(["IMDb","Rotten tomatoes"])
plt.ylabel("Relative Ratings(Scale 0-100)")
plt.title("Ratings ")
plt.show()
#Z-Score Computation Function
def z_score(df):
df_std = df.copy()
for column in df_std.columns:
df_std[column] = (df_std[column] - df_std[column].mean()) / df_std[column].std()
return df_std
new_df_numeric = new_df[["Year","IMDb","Rotten Tomatoes"]]
df_normalized = z_score(new_df_numeric)
#year normalisation
print("Normalized Curve For Year:-")
print("Mean :",df_normalized.Year.mean())
print("Standard Deviation :",df_normalized.Year.std())
Normalized Curve For Year:- Mean : 1.240480447804325e-14 Standard Deviation : 1.0000000000000013
#IMDb normaisation
print("Normalized Curve For IMDb Rating:")
print("Mean :",df_normalized.IMDb.mean())
print("Standard Deviation :",df_normalized.IMDb.std())
Normalized Curve For IMDb Rating: Mean : 6.537794532980621e-15 Standard Deviation : 0.9999999999999976
#rotten tomatoes normalisation
print("Normalized Curve For Rotten Tomatoes Rating:")
print("Mean :",df_normalized['Rotten Tomatoes'].mean())
print("Standard Deviation :",df_normalized['Rotten Tomatoes'].std())
Normalized Curve For Rotten Tomatoes Rating: Mean : -9.671223125359849e-17 Standard Deviation : 0.9999999999999997
new_df.corr()
| Year | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | |
|---|---|---|---|---|---|---|---|
| Year | 1.000000 | -0.233711 | -0.063455 | 0.183902 | -0.216642 | -0.204441 | -0.016520 |
| IMDb | -0.233711 | 1.000000 | 0.481183 | 0.008906 | -0.005082 | 0.082427 | -0.019638 |
| Rotten Tomatoes | -0.063455 | 0.481183 | 1.000000 | 0.021997 | 0.017935 | -0.016312 | 0.026189 |
| Netflix | 0.183902 | 0.008906 | 0.021997 | 1.000000 | -0.568219 | -0.405600 | -0.143042 |
| Hulu | -0.216642 | -0.005082 | 0.017935 | -0.568219 | 1.000000 | -0.252236 | -0.080669 |
| Prime Video | -0.204441 | 0.082427 | -0.016312 | -0.405600 | -0.252236 | 1.000000 | -0.086422 |
| Disney+ | -0.016520 | -0.019638 | 0.026189 | -0.143042 | -0.080669 | -0.086422 | 1.000000 |
subset_df=new_df[['IMDb','Rotten Tomatoes']]
subset_df.corr()
sns.regplot(x="IMDb", y="Rotten Tomatoes", data=subset_df);
#IMDb rating using coorelation analysis (min and max without outliers)
IMDb_rating=np.array(new_df.IMDb)
imdb_iqr=np.percentile(IMDb_rating,75)-np.percentile(IMDb_rating,25)
min_IMDb_no_outliers=max(np.percentile(IMDb_rating,25)-1.5*imdb_iqr,IMDb_rating.min())
max_IMDb_no_outliers=min(np.percentile(IMDb_rating,75)+1.5*imdb_iqr,IMDb_rating.max())
print("Minimum IMDb rating without outliers is ",min_IMDb_no_outliers)
print("Maximum IMDb rating without outliers is ",max_IMDb_no_outliers)
Minimum IMDb rating without outliers is 5.45 Maximum IMDb rating without outliers is 9.5
#Rotten tomatoes rating using correlation comparison (min and max without outliers)
RT_rating=np.array(new_df['Rotten Tomatoes'])
rt_iqr=np.percentile(RT_rating,75)-np.percentile(RT_rating,25)
min_RT_no_outliers=max(np.percentile(RT_rating,25)-1.5*rt_iqr,RT_rating.min())
max_RT_no_outliers=min(np.percentile(RT_rating,75)+1.5*rt_iqr,RT_rating.max())
print("Minimum Rotten Tomatoes rating without outliers is ",min_RT_no_outliers)
print("Maximum Rotten Tomatoes rating without outliers is ",max_RT_no_outliers)
Minimum Rotten Tomatoes rating without outliers is 28.0 Maximum Rotten Tomatoes rating without outliers is 100.0
#removing the outliers
subset_df=new_df[['IMDb','Rotten Tomatoes']]
subset_df=subset_df[subset_df.IMDb>=min_IMDb_no_outliers]
subset_df=subset_df[subset_df.IMDb<=max_IMDb_no_outliers]
subset_df=subset_df[subset_df['Rotten Tomatoes']>=min_RT_no_outliers]
subset_df=subset_df[subset_df['Rotten Tomatoes']<=max_RT_no_outliers]
subset_df
| IMDb | Rotten Tomatoes | |
|---|---|---|
| 0 | 9.5 | 96.0 |
| 1 | 8.8 | 93.0 |
| 2 | 8.4 | 91.0 |
| 3 | 9.1 | 78.0 |
| 4 | 8.7 | 97.0 |
| ... | ... | ... |
| 926 | 5.5 | 100.0 |
| 927 | 7.4 | 68.0 |
| 928 | 6.5 | 50.0 |
| 929 | 6.1 | 50.0 |
| 930 | 6.1 | 92.0 |
879 rows × 2 columns
#plotting graph for the without outliers
sns.regplot(x="IMDb", y="Rotten Tomatoes", data=subset_df);
subset_df.corr()
| IMDb | Rotten Tomatoes | |
|---|---|---|
| IMDb | 1.000000 | 0.448523 |
| Rotten Tomatoes | 0.448523 | 1.000000 |
import numpy as np
import pandas as pd
tv_shows = pd.read_csv('tv_shows.csv')
tv_shows
| Unnamed: 0 | Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | type | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Breaking Bad | 2008 | 18+ | 9.5 | 96% | 1 | 0 | 0 | 0 | 1 |
| 1 | 1 | Stranger Things | 2016 | 16+ | 8.8 | 93% | 1 | 0 | 0 | 0 | 1 |
| 2 | 2 | Money Heist | 2017 | 18+ | 8.4 | 91% | 1 | 0 | 0 | 0 | 1 |
| 3 | 3 | Sherlock | 2010 | 16+ | 9.1 | 78% | 1 | 0 | 0 | 0 | 1 |
| 4 | 4 | Better Call Saul | 2015 | 18+ | 8.7 | 97% | 1 | 0 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5606 | 5606 | Tut's Treasures: Hidden Secrets | 2018 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5607 | 5607 | Paradise Islands | 2017 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5608 | 5608 | Wild Russia | 2018 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5609 | 5609 | Love & Vets | 2017 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5610 | 5610 | United States of Animals | 2016 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
5611 rows × 11 columns
#data cleaning
tv_shows.drop_duplicates(subset='Title',keep='first',inplace=True)
tv_shows['Rotten Tomatoes'] = tv_shows['Rotten Tomatoes'].fillna('0%')
tv_shows['Rotten Tomatoes'] = tv_shows['Rotten Tomatoes'].apply(lambda x : x.rstrip('%'))
tv_shows['Rotten Tomatoes'] = pd.to_numeric(tv_shows['Rotten Tomatoes'])
tv_shows['IMDb'] = tv_shows['IMDb'].fillna(0)
tv_shows['IMDb'] = tv_shows['IMDb']*10
tv_shows['IMDb'] = tv_shows['IMDb'].astype('int')
tv_shows_long=pd.melt(tv_shows[['Title','Netflix','Hulu','Disney+','Prime Video']],id_vars=['Title'],var_name='StreamingOn', value_name='Present')
tv_shows_long = tv_shows_long[tv_shows_long['Present'] == 1]
tv_shows_long.drop(columns=['Present'],inplace=True)
#merging datasets
tv_shows_combined = tv_shows_long.merge(tv_shows, on='Title', how='inner')
tv_shows_combined.drop(columns = ['Unnamed: 0','Netflix','Hulu', 'Prime Video', 'Disney+', 'type'], inplace=True)
#Subsetting.The datasets with IMDB ratings/ Rotten Tomatoes ratings above 0 needs to be considered for plotting.
tv_shows_both_ratings = tv_shows_combined[(tv_shows_combined.IMDb > 0) & tv_shows_combined['Rotten Tomatoes'] > 0]
tv_shows_both_ratings
| Title | StreamingOn | Year | Age | IMDb | Rotten Tomatoes | |
|---|---|---|---|---|---|---|
| 1 | Stranger Things | Netflix | 2016 | 16+ | 88 | 93 |
| 2 | Money Heist | Netflix | 2017 | 18+ | 84 | 91 |
| 4 | Better Call Saul | Netflix | 2015 | 18+ | 87 | 97 |
| 5 | The Office | Netflix | 2005 | 16+ | 89 | 81 |
| 6 | Black Mirror | Netflix | 2011 | 18+ | 88 | 83 |
| ... | ... | ... | ... | ... | ... | ... |
| 4622 | Prime Suspect: Tennison | Prime Video | 2017 | 16+ | 75 | 47 |
| 4652 | Great Expectations | Prime Video | 1998 | 18+ | 72 | 37 |
| 4695 | The Simple Life | Prime Video | 2003 | 16+ | 38 | 35 |
| 4705 | Flash Gordon | Prime Video | 2007 | 7+ | 48 | 15 |
| 4820 | Tokyo Vampire Hotel | Prime Video | 2018 | NaN | 61 | 63 |
474 rows × 6 columns
tv_shows_combined.groupby('StreamingOn').Title.count().plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x29176514348>
#Correlation matrix
import seaborn as sns
import matplotlib.pyplot as plt
corrmat = df.corr()
fig = plt.figure(figsize = (12, 9))
sns.heatmap(corrmat, vmax = .8, square = True, annot = True)
plt.show()
#k clustering
import pandas as pd
DF = pd.read_csv('tv_shows.csv', encoding = 'utf-8')
DF
| Unnamed: 0 | Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | type | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Breaking Bad | 2008 | 18+ | 9.5 | 96% | 1 | 0 | 0 | 0 | 1 |
| 1 | 1 | Stranger Things | 2016 | 16+ | 8.8 | 93% | 1 | 0 | 0 | 0 | 1 |
| 2 | 2 | Money Heist | 2017 | 18+ | 8.4 | 91% | 1 | 0 | 0 | 0 | 1 |
| 3 | 3 | Sherlock | 2010 | 16+ | 9.1 | 78% | 1 | 0 | 0 | 0 | 1 |
| 4 | 4 | Better Call Saul | 2015 | 18+ | 8.7 | 97% | 1 | 0 | 0 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5606 | 5606 | Tut's Treasures: Hidden Secrets | 2018 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5607 | 5607 | Paradise Islands | 2017 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5608 | 5608 | Wild Russia | 2018 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5609 | 5609 | Love & Vets | 2017 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
| 5610 | 5610 | United States of Animals | 2016 | NaN | NaN | NaN | 0 | 0 | 0 | 1 | 1 |
5611 rows × 11 columns
import numpy as np
from sklearn import preprocessing
DF_ARRAY = np.array(DF.iloc[:,6:11]) # Getting only the numeric features from the dataset
DF_NORM = preprocessing.normalize(DF_ARRAY) # Normalizing the data
DF_NORM
array([[0.70710678, 0. , 0. , 0. , 0.70710678],
[0.70710678, 0. , 0. , 0. , 0.70710678],
[0.70710678, 0. , 0. , 0. , 0.70710678],
...,
[0. , 0. , 0. , 0.70710678, 0.70710678],
[0. , 0. , 0. , 0.70710678, 0.70710678],
[0. , 0. , 0. , 0.70710678, 0.70710678]])
from sklearn.cluster import KMeans
# Creating our Model
kmeans = KMeans(n_clusters = 3)
# Training our model
kmeans.fit(DF_NORM)
KMeans(n_clusters=3)
import matplotlib.pyplot as plt
# Amount of values to be tested for K
Ks = range(6, 11)
# List to hold on the metrics for each value of K
results = []
# Executing the loop
for K in Ks:
model = KMeans(n_clusters = K)
model.fit(DF_NORM)
results.append(model.inertia_)
# Plotting the final result
plt.plot(Ks, results, 'o-')
plt.xlabel("Values of K")
plt.ylabel("tv_shows")
plt.show()
from sklearn.cluster import KMeans
# Creating our Model
kmeans = KMeans(n_clusters = 4)
# Training our model
kmeans.fit(DF_NORM)
# You can see the labels (clusters) assigned for each data point with the function labels_
kmeans.labels_
# Assigning the labels to the initial dataset
DF['cluster'] = kmeans.labels_
from matplotlib import pylab
from pylab import *
from sklearn.decomposition import PCA
import pylab as pl
# Reducing data dimensions
PCA_ = PCA(n_components = 2).fit(DF_ARRAY)
# Applying the PCA
PCA_2 = PCA_.transform(DF_ARRAY)
#--------------------------
# Plotting the cluster
#--------------------------
# Plot size
pylab.rcParams['figure.figsize'] = (8.0, 8.0)
# Plotting each point individually depending on their cluster
for i in range(0, PCA_2.shape[0]):
# If the 'i' data point be in cluster 0, it will be plotted as the formatting inside the if functions
# And so on...
if kmeans.labels_[i] == 0:
CLUSTER_01 = pl.scatter(PCA_2[i,0], PCA_2[i,1], c ='r', marker = 'o', s = 120)
elif kmeans.labels_[i] == 1:
CLUSTER_02 = pl.scatter(PCA_2[i,0], PCA_2[i,1], c ='g', marker = 'o', s = 120)
elif kmeans.labels_[i] == 2:
CLUSTER_03 = pl.scatter(PCA_2[i,0], PCA_2[i,1], c ='b', marker = 'o', s = 120)
elif kmeans.labels_[i] == 3:
CLUSTER_04 = pl.scatter(PCA_2[i,0], PCA_2[i,1], c ='y', marker = 'o', s = 120)
# Formatting the Plot
pl.legend([CLUSTER_01, CLUSTER_02, CLUSTER_03, CLUSTER_04],
['Cluster 01', 'Cluster 02', 'Cluster 03', 'Cluster 04'])
pl.title('TV_SHOWS')
pl.show()
import pandas as pd
import numpy as np
import missingno as msno
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
df=pd.read_csv("tv_shows.csv")
df.head()
| Unnamed: 0 | Title | Year | Age | IMDb | Rotten Tomatoes | Netflix | Hulu | Prime Video | Disney+ | type | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Breaking Bad | 2008 | 18+ | 9.5 | 96% | 1 | 0 | 0 | 0 | 1 |
| 1 | 1 | Stranger Things | 2016 | 16+ | 8.8 | 93% | 1 | 0 | 0 | 0 | 1 |
| 2 | 2 | Money Heist | 2017 | 18+ | 8.4 | 91% | 1 | 0 | 0 | 0 | 1 |
| 3 | 3 | Sherlock | 2010 | 16+ | 9.1 | 78% | 1 | 0 | 0 | 0 | 1 |
| 4 | 4 | Better Call Saul | 2015 | 18+ | 8.7 | 97% | 1 | 0 | 0 | 0 | 1 |
# Visualize missing values as a matrix
msno.matrix(df)
<matplotlib.axes._subplots.AxesSubplot at 0x2917f2d56c8>
msno.bar(df)
<matplotlib.axes._subplots.AxesSubplot at 0x2917f332488>
for i in df.columns:
null_rate = df[i].isnull().sum() / len(df)*100
if null_rate > 0 :
print( "{}'s null rate: {}%".format(i, round(null_rate, 2)))
Age's null rate: 43.59% IMDb's null rate: 20.69% Rotten Tomatoes's null rate: 81.98%
#wordcloud
from wordcloud import WordCloud, STOPWORDS
text = ' '.join(df['Title'])
plt.rcParams['figure.figsize'] = (6,12)
wordcloud = WordCloud(background_color = 'black',colormap='vlag', width = 1200, height = 1200, max_words = 121).generate(text)
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
col_names_online_media = ['Netflix', 'Prime Video', 'Disney+', 'Hulu']
col_names_rating_sites = ['IMDb', 'Rotten Tomatoes']
pallete_dict = {
'Netflix': ['#A9A9A9', '#E50914'],
'Prime Video': ['#A9A9A9', '#00A8E1'],
'Hulu' : ['#A9A9A9', '#3DBB3D'],
'Disney+' : ['#A9A9A9', '#113CCF']
}
fig = plt.figure(figsize=[20,10])
sns.set_style('dark')
for col_name, num in zip(col_names_online_media,
range(1,len(col_names_online_media) + 1)):
ax = fig.add_subplot(2, 2, num)
sns.scatterplot(x="Year", y="Title",
palette = pallete_dict[col_name],
hue=col_name,
data=df.sort_values(by=[col_name, 'Title']),
ax=ax)
sns.despine
ax.set_title('TV Shows on ' + col_name, fontsize=25)
handles, labels = ax.get_legend_handles_labels()
ax.legend(loc='upper left',
frameon=None,
edgecolor='black',
fontsize=15,
framealpha=0.2,
handles=[handles[2]],
labels=['On ' + col_name])
ax.set_xlim(1900, 2022)
ax.set(yticklabels=[])
ax.set(ylabel=None, xlabel=None)
fig.tight_layout()
for ax in fig.get_axes():
ax.label_outer()
plt.show()
dict_subscribers_Millions = {'Netflix':183, 'Amazon Prime Video': 75, 'Disney+': 54.5, 'Hulu': 30 }
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import string
fig = plt.figure(figsize=[10,8])
sns.set_style('darkgrid')
ax = sns.barplot(x=list(dict_subscribers_Millions.keys()), y=list(dict_subscribers_Millions.values()),
palette = ['#E50914', '#00A8E1', '#113CCF' , '#3DBB3D']
)
plt.ylabel('Subscribers in Millions')
plt.savefig('demo9.png', transparent=True)